home *** CD-ROM | disk | FTP | other *** search
/ IRIX Base Documentation 2002 November / SGI IRIX Base Documentation 2002 November.iso / usr / share / catman / p_man / cat3 / shmem_max.z / shmem_max
Encoding:
Text File  |  2002-10-03  |  20.8 KB  |  256 lines

  1.  
  2. SHMEM_MAX(3)                                                   SHMEM_MAX(3)
  3.  
  4.  
  5. NNNNAAAAMMMMEEEE
  6.      sssshhhhmmmmeeeemmmm____ddddoooouuuubbbblllleeee____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____ffffllllooooaaaatttt____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____iiiinnnntttt____mmmmaaaaxxxx____ttttoooo____aaaallllllll,
  7.      sssshhhhmmmmeeeemmmm____iiiinnnntttt4444____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____iiiinnnntttt8888____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____lllloooonnnngggg____mmmmaaaaxxxx____ttttoooo____aaaallllllll,
  8.      sssshhhhmmmmeeeemmmm____lllloooonnnnggggddddoooouuuubbbblllleeee____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____lllloooonnnngggglllloooonnnngggg____mmmmaaaaxxxx____ttttoooo____aaaallllllll,
  9.      sssshhhhmmmmeeeemmmm____rrrreeeeaaaallll4444____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____rrrreeeeaaaallll8888____mmmmaaaaxxxx____ttttoooo____aaaallllllll,
  10.      sssshhhhmmmmeeeemmmm____rrrreeeeaaaallll11116666____mmmmaaaaxxxx____ttttoooo____aaaallllllll, sssshhhhmmmmeeeemmmm____sssshhhhoooorrrrtttt____mmmmaaaaxxxx____ttttoooo____aaaallllllll - Performs a maximum
  11.      function reduction across a set of processing elements (PEs)
  12.  
  13. SSSSYYYYNNNNOOOOPPPPSSSSIIIISSSS
  14.      C or C++:
  15.  
  16.         ####iiiinnnncccclllluuuuddddeeee <<<<mmmmpppppppp////sssshhhhmmmmeeeemmmm....hhhh>>>>
  17.  
  18.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____ddddoooouuuubbbblllleeee____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((ddddoooouuuubbbblllleeee ****_t_a_r_g_e_t,,,, ddddoooouuuubbbblllleeee ****_s_o_u_r_c_e,,,, iiiinnnntttt
  19.         _n_r_e_d_u_c_e,,,, iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,, ddddoooouuuubbbblllleeee ****_p_W_r_k,,,,
  20.         lllloooonnnngggg ****_p_S_y_n_c))));;;;
  21.  
  22.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____ffffllllooooaaaatttt____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((ffffllllooooaaaatttt ****_t_a_r_g_e_t,,,, ffffllllooooaaaatttt ****_s_o_u_r_c_e,,,, iiiinnnntttt
  23.         _n_r_e_d_u_c_e,,,, iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,, ffffllllooooaaaatttt ****_p_W_r_k,,,,
  24.         lllloooonnnngggg ****_p_S_y_n_c))));;;;
  25.  
  26.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____iiiinnnntttt____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((iiiinnnntttt ****_t_a_r_g_e_t,,,, iiiinnnntttt ****_s_o_u_r_c_e,,,, iiiinnnntttt _n_r_e_d_u_c_e,,,,
  27.         iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,, iiiinnnntttt ****_p_W_r_k,,,, lllloooonnnngggg
  28.         ****_p_S_y_n_c))));;;;
  29.  
  30.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____lllloooonnnngggg____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((lllloooonnnngggg ****_t_a_r_g_e_t,,,, lllloooonnnngggg ****_s_o_u_r_c_e,,,, iiiinnnntttt _n_r_e_d_u_c_e,,,,
  31.         iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,, lllloooonnnngggg ****_p_W_r_k,,,, lllloooonnnngggg
  32.         ****_p_S_y_n_c))));;;;
  33.  
  34.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____lllloooonnnnggggddddoooouuuubbbblllleeee____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((lllloooonnnngggg ddddoooouuuubbbblllleeee ****_t_a_r_g_e_t,,,, lllloooonnnngggg ddddoooouuuubbbblllleeee
  35.         ****_s_o_u_r_c_e,,,, iiiinnnntttt _n_r_e_d_u_c_e,,,, iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,,
  36.         lllloooonnnngggg ddddoooouuuubbbblllleeee ****_p_W_r_k,,,, lllloooonnnngggg ****_p_S_y_n_c))));;;;
  37.  
  38.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____lllloooonnnngggglllloooonnnngggg____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((lllloooonnnngggg lllloooonnnngggg ****_t_a_r_g_e_t,,,, lllloooonnnngggg lllloooonnnngggg
  39.         ****_s_o_u_r_c_e,,,, iiiinnnntttt _n_r_e_d_u_c_e,,,, iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,,
  40.         lllloooonnnngggg lllloooonnnngggg ****_p_W_r_k,,,, lllloooonnnngggg ****_p_S_y_n_c))));;;;
  41.  
  42.         vvvvooooiiiidddd sssshhhhmmmmeeeemmmm____sssshhhhoooorrrrtttt____mmmmaaaaxxxx____ttttoooo____aaaallllllll((((sssshhhhoooorrrrtttt ****_t_a_r_g_e_t,,,, sssshhhhoooorrrrtttt ****_s_o_u_r_c_e,,,, iiiinnnntttt
  43.         _n_r_e_d_u_c_e,,,, iiiinnnntttt _P_E__s_t_a_r_t,,,, iiiinnnntttt _l_o_g_P_E__s_t_r_i_d_e,,,, iiiinnnntttt _P_E__s_i_z_e,,,, sssshhhhoooorrrrtttt ****_p_W_r_k,,,,
  44.         lllloooonnnngggg ****_p_S_y_n_c))));;;;
  45.  
  46.      Fortran:
  47.  
  48.         IIIINNNNCCCCLLLLUUUUDDDDEEEE """"mmmmpppppppp////sssshhhhmmmmeeeemmmm....ffffhhhh""""
  49.  
  50.         IIIINNNNTTTTEEEEGGGGEEEERRRR _p_S_y_n_c((((SSSSHHHHMMMMEEEEMMMM____RRRREEEEDDDDUUUUCCCCEEEE____SSSSYYYYNNNNCCCC____SSSSIIIIZZZZEEEE))))
  51.         IIIINNNNTTTTEEEEGGGGEEEERRRR _n_r_e_d_u_c_e,,,, _P_E__s_t_a_r_t,,,, _l_o_g_P_E__s_t_r_i_d_e,,,, _P_E__s_i_z_e
  52.  
  53.         CCCCAAAALLLLLLLL SSSSHHHHMMMMEEEEMMMM____IIIINNNNTTTT4444____MMMMAAAAXXXX____TTTTOOOO____AAAALLLLLLLL((((_t_a_r_g_e_t,,,, _s_o_u_r_c_e,,,, _n_r_e_d_u_c_e,,,, _P_E__s_t_a_r_t,,,,
  54.         _l_o_g_P_E__s_t_r_i_d_e,,,, _P_E__s_i_z_e,,,, _p_W_r_k,,,, _p_S_y_n_c))))
  55.  
  56.         CCCCAAAALLLLLLLL SSSSHHHHMMMMEEEEMMMM____IIIINNNNTTTT8888____MMMMAAAAXXXX____TTTTOOOO____AAAALLLLLLLL((((_t_a_r_g_e_t,,,, _s_o_u_r_c_e,,,, _n_r_e_d_u_c_e,,,, _P_E__s_t_a_r_t,,,,
  57.         _l_o_g_P_E__s_t_r_i_d_e,,,, _P_E__s_i_z_e,,,, _p_W_r_k,,,, _p_S_y_n_c))))
  58.  
  59.         CCCCAAAALLLLLLLL SSSSHHHHMMMMEEEEMMMM____RRRREEEEAAAALLLL4444____MMMMAAAAXXXX____TTTTOOOO____AAAALLLLLLLL((((_t_a_r_g_e_t,,,, _s_o_u_r_c_e,,,, _n_r_e_d_u_c_e,,,, _P_E__s_t_a_r_t,,,,
  60.         _l_o_g_P_E__s_t_r_i_d_e,,,, _P_E__s_i_z_e,,,, _p_W_r_k,,,, _p_S_y_n_c))))
  61.  
  62.         CCCCAAAALLLLLLLL SSSSHHHHMMMMEEEEMMMM____RRRREEEEAAAALLLL8888____MMMMAAAAXXXX____TTTTOOOO____AAAALLLLLLLL((((_t_a_r_g_e_t,,,, _s_o_u_r_c_e,,,, _n_r_e_d_u_c_e,,,, _P_E__s_t_a_r_t,,,,
  63.         _l_o_g_P_E__s_t_r_i_d_e,,,, _P_E__s_i_z_e,,,, _p_W_r_k,,,, _p_S_y_n_c))))
  64.  
  65.         CCCCAAAALLLLLLLL SSSSHHHHMMMMEEEEMMMM____RRRREEEEAAAALLLL11116666____MMMMAAAAXXXX____TTTTOOOO____AAAALLLLLLLL((((_t_a_r_g_e_t,,,, _s_o_u_r_c_e,,,, _n_r_e_d_u_c_e,,,, _P_E__s_t_a_r_t,,,,
  66.         _l_o_g_P_E__s_t_r_i_d_e,,,, _P_E__s_i_z_e,,,, _p_W_r_k,,,, _p_S_y_n_c))))
  67.  
  68. DDDDEEEESSSSCCCCRRRRIIIIPPPPTTTTIIIIOOOONNNN
  69.      The shared memory (SHMEM) reduction routines compute one or more
  70.      reductions across symmetric arrays on multiple virtual PEs.  A
  71.      reduction performs an associative binary operation across a set of
  72.      values.  For a list of other SHMEM reduction routines, see
  73.      iiiinnnnttttrrrroooo____sssshhhhmmmmeeeemmmm(3).
  74.  
  75.      As with all SHMEM collective routines, each of these routines assumes
  76.      that only PEs in the active set call the routine.  If a PE not in the
  77.      active set calls a SHMEM collective routine, undefined behavior
  78.      results.
  79.  
  80.      The _n_r_e_d_u_c_e argument determines the number of separate reductions to
  81.      perform.  The source array on all PEs in the active set provides one
  82.      element for each reduction.  The results of the reductions are placed
  83.      in the target array on all PEs in the active set.  The active set is
  84.      defined by the _P_E__s_t_a_r_t, _l_o_g_P_E__s_t_r_i_d_e, _P_E__s_i_z_e triplet.
  85.  
  86.      The _s_o_u_r_c_e and _t_a_r_g_e_t arrays may be the same array, but they may not
  87.      be overlapping arrays.
  88.  
  89.      The arguments are as follows:
  90.  
  91.      _t_a_r_g_e_t       A symmetric array of length _n_r_e_d_u_c_e elements to receive
  92.                   the results of the reduction operations.
  93.  
  94.                   The data type of _t_a_r_g_e_t varies with the version of the
  95.                   reduction routine being called.  When calling from C,
  96.                   refer to the SYNOPSIS section for data type information.
  97.                   When calling from Fortran, the _t_a_r_g_e_t data types are as
  98.                   follows:
  99.  
  100.                   RRRRoooouuuuttttiiiinnnneeee                       DDDDaaaattttaaaa TTTTyyyyppppeeee
  101.  
  102.                   sssshhhhmmmmeeeemmmm____ccccoooommmmpppp8888____mmmmaaaaxxxx____ttttoooo____aaaallllllll        Complex, with an element
  103.                                                 size equal to two 8-byte
  104.                                                 real values
  105.  
  106.                   sssshhhhmmmmeeeemmmm____iiiinnnntttt4444____mmmmaaaaxxxx____ttttoooo____aaaallllllll         Integer, with an element
  107.                                                 size of 4 bytes
  108.  
  109.                   sssshhhhmmmmeeeemmmm____iiiinnnntttt8888____mmmmaaaaxxxx____ttttoooo____aaaallllllll         Integer, with an element
  110.                                                 size of 8 bytes
  111.  
  112.                   sssshhhhmmmmeeeemmmm____rrrreeeeaaaallll4444____mmmmaaaaxxxx____ttttoooo____aaaallllllll        Real, with an element size
  113.                                                 of 4 bytes
  114.  
  115.                   sssshhhhmmmmeeeemmmm____rrrreeeeaaaallll11116666____mmmmaaaaxxxx____ttttoooo____aaaallllllll       Real, with an element size
  116.                                                 of 16 bytes
  117.  
  118.      _s_o_u_r_c_e       A symmetric array of length _n_r_e_d_u_c_e elements that
  119.                   contains one element for each separate reduction
  120.                   operation.  The _s_o_u_r_c_e argument must have the same data
  121.                   type as _t_a_r_g_e_t.
  122.  
  123.      _n_r_e_d_u_c_e      The number of elements in the _t_a_r_g_e_t and _s_o_u_r_c_e arrays.
  124.                   _n_r_e_d_u_c_e must be of type integer.  If you are using
  125.                   Fortran, it must be a default integer value.
  126.  
  127.      _P_E__s_t_a_r_t     The lowest virtual PE number of the active set of PEs.
  128.                   _P_E__s_t_a_r_t must be of type integer.  If you are using
  129.                   Fortran, it must be a default integer value.
  130.  
  131.      _l_o_g_P_E__s_t_r_i_d_e The log (base 2) of the stride between consecutive
  132.                   virtual PE numbers in the active set.  _l_o_g_P_E__s_t_r_i_d_e must
  133.                   be of type integer.  If you are using Fortran, it must be
  134.                   a default integer value.
  135.  
  136.      _P_E__s_i_z_e      The number of PEs in the active set.  _P_E__s_i_z_e must be of
  137.                   type integer.  If you are using Fortran, it must be a
  138.                   default integer value.
  139.  
  140.      _p_W_r_k         A symmetric work array.  The _p_W_r_k argument must have the
  141.                   same data type as _t_a_r_g_e_t.
  142.  
  143.                   In C/C++, this contains
  144.                   mmmmaaaaxxxx((((_n_r_e_d_u_c_e////2222 ++++ 1111,,,, ____SSSSHHHHMMMMEEEEMMMM____RRRREEEEDDDDUUUUCCCCEEEE____MMMMIIIINNNN____WWWWRRRRKKKKDDDDAAAATTTTAAAA____SSSSIIIIZZZZEEEE))))
  145.                   elements.
  146.  
  147.                   In Fortran, this contains
  148.                   mmmmaaaaxxxx((((_n_r_e_d_u_c_e////2222 ++++ 1111,,,, SSSSHHHHMMMMEEEEMMMM____RRRREEEEDDDDUUUUCCCCEEEE____MMMMIIIINNNN____WWWWRRRRKKKKDDDDAAAATTTTAAAA____SSSSIIIIZZZZEEEE))))
  149.                   elements.
  150.  
  151.      _p_S_y_n_c        A symmetric work array.
  152.  
  153.                   In C/C++, _p_S_y_n_c is of type lllloooonnnngggg and size
  154.                   ____SSSSHHHHMMMMEEEEMMMM____RRRREEEEDDDDUUUUCCCCEEEE____SSSSYYYYNNNNCCCC____SSSSIIIIZZZZEEEE.
  155.  
  156.                   In Fortran, _p_S_y_n_c is of type integer and size
  157.                   SSSSHHHHMMMMEEEEMMMM____RRRREEEEDDDDUUUUCCCCEEEE____SSSSYYYYNNNNCCCC____SSSSIIIIZZZZEEEE.  If you are using Fortran, it
  158.                   must be a default integer value.
  159.  
  160.                   Every element of this array must be initialized with the
  161.                   value ____SSSSHHHHMMMMEEEEMMMM____SSSSYYYYNNNNCCCC____VVVVAAAALLLLUUUUEEEE (in C/C++) or SSSSHHHHMMMMEEEEMMMM____SSSSYYYYNNNNCCCC____VVVVAAAALLLLUUUUEEEE
  162.                   (in Fortran) before any of the PEs in the active set
  163.                   enter the reduction routine.
  164.  
  165.      The values of arguments _n_r_e_d_u_c_e, _P_E__s_t_a_r_t, _l_o_g_P_E__s_t_r_i_d_e, and _P_E__s_i_z_e
  166.      must be equal on all PEs in the active set.  The same _t_a_r_g_e_t and
  167.      _s_o_u_r_c_e arrays, and the same _p_W_r_k and _p_S_y_n_c work arrays, must be passed
  168.      to all PEs in the active set.
  169.  
  170.      Before any PE calls a reduction routine, you must ensure that the
  171.      following conditions exist (synchronization via a barrier or some
  172.      other method is often needed to ensure this):
  173.  
  174.      * The _p_W_r_k and _p_S_y_n_c arrays on all PEs in the active set are not still
  175.        in use from a prior call to a collective SHMEM routine.
  176.  
  177.      * The _t_a_r_g_e_t array on all PEs in the active set is ready to accept the
  178.        results of the reduction.
  179.  
  180.      Upon return from a reduction routine, the following are true for the
  181.      local PE:
  182.  
  183.      * The _t_a_r_g_e_t array is updated.
  184.  
  185.      * The data cache region mapped to _t_a_r_g_e_t is coherent.
  186.  
  187.      * The values in the _p_S_y_n_c array are restored to the original values.
  188.  
  189. NNNNOOOOTTTTEEEESSSS
  190.      The terms _c_o_l_l_e_c_t_i_v_e, _s_y_m_m_e_t_r_i_c, and _c_a_c_h_e _a_l_i_g_n_e_d are defined in
  191.      iiiinnnnttttrrrroooo____sssshhhhmmmmeeeemmmm(3).
  192.  
  193.      All SHMEM reduction routines reset the values in _p_S_y_n_c before they
  194.      return, so a particular _p_S_y_n_c buffer need only be initialized the
  195.      first time it is used.
  196.  
  197.      You must ensure that the _p_S_y_n_c array is not being updated on any PE in
  198.      the active set while any of the PEs participate in processing of a
  199.      SHMEM reduction routine.  Be careful of the following situations:
  200.  
  201.      * If the _p_S_y_n_c array is initialized at run time, some type of
  202.        synchronization is needed to ensure that all PEs in the working set
  203.        have initialized _p_S_y_n_c before any of them enter a SHMEM routine
  204.        called with the _p_S_y_n_c synchronization array.
  205.  
  206.      * A _p_S_y_n_c or _p_W_r_k array can be reused in a subsequent reduction
  207.        routine call only if none of the PEs in the active set are still
  208.        processing a prior reduction routine call that used the same _p_S_y_n_c
  209.        or _p_W_r_k arrays.  In general, this can be assured only by doing some
  210.        type of synchronization.  However, in the special case of reduction
  211.        routines being called with the same active set, you can allocate two
  212.        _p_S_y_n_c and _p_W_r_k arrays and alternate between them on successive
  213.        calls.
  214.  
  215. EEEEXXXXAAAAMMMMPPPPLLLLEEEESSSS
  216.      Example 1:  This Fortran example statically initializes the _p_S_y_n_c
  217.      array and finds the maximum value of real variable FFFFOOOOOOOO across all even
  218.      PEs.
  219.  
  220.           INCLUDE "mpp/shmem.fh"
  221.  
  222.           INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
  223.           DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
  224.           PARAMETER (NR=1)
  225.           REAL FOO, FOOMAX, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
  226.           COMMON /COM/ FOO, FOOMAX, PWRK
  227.           INTRINSIC MY_PE
  228.  
  229.           IF ( MOD(MY_PE(),2) .EQ. 0) THEN
  230.              CALL SHMEM_REAL8_MAX_TO_ALL(FOOMAX, FOO, NR, 0, 1, N$PES/2,
  231.          &      PWRK, PSYNC)
  232.              PRINT*,'Result on PE ',MY_PE(),' is ',FOOMAX
  233.           ENDIF
  234.  
  235.      Example 2:  Consider the following C/C++ call:
  236.  
  237.           shmem_int_max_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
  238.  
  239.      The preceding call is more efficient, but semantically equivalent to,
  240.      the combination of the following calls:
  241.  
  242.           shmem_int_max_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
  243.              pwrk1, psync1);
  244.           shmem_int_max_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
  245.              pwrk2, psync2);
  246.           shmem_int_max_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
  247.              pwrk1, psync1);
  248.  
  249.      Note that two sets of _p_W_r_k and _p_S_y_n_c arrays are used alternately
  250.      because no synchronization is done between calls.
  251.  
  252. SSSSEEEEEEEE AAAALLLLSSSSOOOO
  253.      iiiinnnnttttrrrroooo____sssshhhhmmmmeeeemmmm(3)
  254.  
  255.      _M_e_s_s_a_g_e _P_a_s_s_i_n_g _T_o_o_l_k_i_t: _M_P_I _P_r_o_g_r_a_m_m_e_r'_s _M_a_n_u_a_l
  256.